This is the code for the lecture video which goes over tree methods in Python. Reference the video lecture for the full explanation of the code!
I also wrote a blog post explaining the general logic of decision trees and random forests which you can check out.
In [48]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [8]:
df = pd.read_csv('kyphosis.csv')
In [21]:
df.head()
Out[21]:
In [27]:
sns.pairplot(df,hue='Kyphosis',palette='Set1')
Out[27]:
In [13]:
from sklearn.model_selection import train_test_split
In [14]:
X = df.drop('Kyphosis',axis=1)
y = df['Kyphosis']
In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)
In [10]:
from sklearn.tree import DecisionTreeClassifier
In [11]:
dtree = DecisionTreeClassifier()
In [16]:
dtree.fit(X_train,y_train)
Out[16]:
In [17]:
predictions = dtree.predict(X_test)
In [18]:
from sklearn.metrics import classification_report,confusion_matrix
In [19]:
print(classification_report(y_test,predictions))
In [20]:
print(confusion_matrix(y_test,predictions))
In [33]:
from IPython.display import Image
from sklearn.externals.six import StringIO
from sklearn.tree import export_graphviz
import pydot
features = list(df.columns[1:])
features
Out[33]:
In [39]:
dot_data = StringIO()
export_graphviz(dtree, out_file=dot_data,feature_names=features,filled=True,rounded=True)
graph = pydot.graph_from_dot_data(dot_data.getvalue())
Image(graph[0].create_png())
Out[39]:
In [41]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(n_estimators=100)
rfc.fit(X_train, y_train)
Out[41]:
In [45]:
rfc_pred = rfc.predict(X_test)
In [46]:
print(confusion_matrix(y_test,rfc_pred))
In [47]:
print(classification_report(y_test,rfc_pred))